package com.rapidminer.krimp;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;

import com.rapidminer.krimp.comparators.StandardCoverComparator;
import com.rapidminer.operator.ResultObjectAdapter;
import com.rapidminer.operator.learner.associations.FrequentItemSet;
import com.rapidminer.operator.learner.associations.Item;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Tools;

/**
 * This class models a code table. A static method returns the standard code
 * table for a given database, which can be used to create new code tables, by
 * adding/removing itemsets to the given code table.
 * 
 * @author Siby
 * 
 */
public class CodeTable extends ResultObjectAdapter implements
		Iterable<FrequentItemSet>, Cloneable {

	private static final long serialVersionUID = -2576984284405664369L;
	private final SortedMap<FrequentItemSet, Integer> usage;
	private final int usageSum, suppSum;
	private final double compressedSize;

	private CodeTable(SortedMap<FrequentItemSet, Integer> usage, int usageSum,
			int suppSum, double compressedSize) {
		this.usage = usage;
		this.suppSum = suppSum;
		this.usageSum = usageSum;
		this.compressedSize = compressedSize;
	}

	/**
	 * Creates a standard code table whose elements are sorted to the standard
	 * cover order.
	 * 
	 * @param db
	 *            the database that is supposed to be encoded
	 */
	public static CodeTable getStandardCodeTable(Database db) {
		int usageSum = db.getNumberOfStoredItems();
		double compressedSize = 0;
		SortedMap<FrequentItemSet, Integer> usage = new TreeMap<FrequentItemSet, Integer>(
				new StandardCoverComparator());
		for (Item item : db.getItems()) {
			if (item.getFrequency() > 0) {
				FrequentItemSet itemSet = new FrequentItemSet();
				itemSet.addItem(item, item.getFrequency());
				usage.put(itemSet, item.getFrequency());
			}
		}
		compressedSize = compressedSize(usage, usageSum, usageSum);
		return new CodeTable(usage, usageSum, usageSum, compressedSize);
	}

	/**
	 * Returns the standard code table for this code table, which is used to
	 * encode the left side of the code table.
	 * 
	 * @return the standard code table for this code table
	 */
	public CodeTable getStandardCodeTable() {
		SortedMap<FrequentItemSet, Integer> usageST = new TreeMap<FrequentItemSet, Integer>();
		FrequentItemSet singleton = usage.lastKey();
		while (singleton != null && singleton.getNumberOfItems() == 1) {
			usageST.put(singleton, singleton.getFrequency());
			singleton = ((TreeMap<FrequentItemSet, Integer>) usage)
					.lowerKey(singleton);
		}
		return new CodeTable(usageST, suppSum, suppSum, compressedSize(usageST,
				suppSum, suppSum));
	}

	/**
	 * Creates a new code table with a coding set that contains additionally to
	 * the given coding set the newItemSet. Since this new code table can be
	 * created accordingly to the information about this code table, this method
	 * is supposed to be faster than the creation of a new code table with a new
	 * coding set.
	 * 
	 * @param newItemSet
	 *            the item set to be added to the code table
	 * @param db
	 *            the database
	 * @return a new code table with the added item set
	 */
	public CodeTable addToCodingSet(FrequentItemSet newItemSet, Database db) {
		if (usage.keySet().contains(newItemSet))
			return this;
		SortedMap<FrequentItemSet, Integer> newUsage = new TreeMap<FrequentItemSet, Integer>(
				new StandardCoverComparator());
		newUsage.putAll(usage);
		newUsage.put(newItemSet, 0);
		int newUsageSum = usageSum;
		for (Set<Item> transaction : db) {
			if (transaction.containsAll(newItemSet.getItems())) {
				Set<FrequentItemSet> oldCover = cover(new HashSet<Item>(
						transaction), usage);
				Set<FrequentItemSet> newCover = cover(new HashSet<Item>(
						transaction), newUsage);
				for (FrequentItemSet x : oldCover) {
					newUsage.put(x,
							newUsage.get(x) - db.getFrequency(transaction));
					newUsageSum -= db.getFrequency(transaction);
				}
				for (FrequentItemSet x : newCover) {
					newUsage.put(x,
							newUsage.get(x) + db.getFrequency(transaction));
					newUsageSum += db.getFrequency(transaction);

				}
			}
		}
		double newCompressedSize = compressedSize(newUsage, newUsageSum,
				suppSum);
		return new CodeTable(newUsage, newUsageSum, suppSum, newCompressedSize);
	}

	/**
	 * Creates a new code table whis is equal to this code table except that the
	 * specified itemset is not contained. Since this new code table can be
	 * created accordingly to the information about this code table, this method
	 * is supposed to be faster than the creation of a new code table with a new
	 * coding set.
	 * 
	 * @param delItemSet
	 *            the item set to be deleted in the new code table
	 * @return a new code table where the specified itemset is not contained
	 */
	public CodeTable removeFromCodingSet(FrequentItemSet delItemSet) {
		if (!usage.keySet().contains(delItemSet))
			return this;
		SortedMap<FrequentItemSet, Integer> newUsage = new TreeMap<FrequentItemSet, Integer>(
				new StandardCoverComparator());
		newUsage.putAll(usage);
		newUsage.remove(delItemSet);
		int newUsageSum = usageSum - usage.get(delItemSet);
		Set<FrequentItemSet> delItemSetCover = cover(new HashSet<Item>(
				delItemSet.getItems()), newUsage);
		for (FrequentItemSet x : delItemSetCover) {
			newUsage.put(x, newUsage.get(x) + usage.get(delItemSet));
			newUsageSum += usage.get(delItemSet);
		}
		double newCompressedSize = compressedSize(newUsage, newUsageSum,
				suppSum);
		return new CodeTable(newUsage, newUsageSum, suppSum, newCompressedSize);
	}

	/**
	 * Computes the sets of a code table that are used to cover the transaction.
	 * The code table is represented by the usage function.
	 * 
	 * @param transaction
	 *            a transaction from the database
	 * @param usage
	 *            the usage function of a code table
	 * @return the itemsets contained in the domain of the usage function that
	 *         are used to cover the given transaction
	 */
	private Set<FrequentItemSet> cover(Set<Item> transaction,
			SortedMap<FrequentItemSet, Integer> usage) {
		Set<FrequentItemSet> res = new HashSet<FrequentItemSet>();
		for (Iterator<FrequentItemSet> it = usage.keySet().iterator(); it
				.hasNext() && !transaction.isEmpty();) {
			FrequentItemSet s = it.next();
			if (transaction.containsAll(s.getItems())) {
				res.add(s);
				transaction.removeAll(s.getItems());
			}
		}
		return res;
	}

	/**
	 * Computes the total compressed size of the encoded database and the code
	 * table described by the committed parameters, L(D,CT), in bits, as
	 * described in van Leeuwen, M. Patterns that Matter, Ph.D. dissertation,
	 * Universiteit Utrecht, 2010, p15.
	 * 
	 * @param usage
	 *            a map from itemsets of the code table to their computed usages
	 * @param suppSum
	 *            the sum of all supports of singleton itemsets
	 * @param usageSum
	 *            the sum of all usages of items in the code table
	 * @return the compressed size L(D,CT)
	 */
	private static double compressedSize(
			SortedMap<FrequentItemSet, Integer> usage, int usageSum, int suppSum) {
		double size = 0;
		for (FrequentItemSet cItemSet : usage.keySet()) {
			if (usage.get(cItemSet) != 0) {
				size += (usage.get(cItemSet) + 1)
						* Math.log((double) usageSum
								/ (double) usage.get(cItemSet));
				for (Item item : cItemSet.getItems()) {
					size += Math.log((double) suppSum
							/ (double) item.getFrequency());
				}

			}
		}
		return size / Math.log(2);
	}

	/**
	 * Returns the total compressed size of the encoded database and the code
	 * table, in bits, as described in van Leeuwen, M. Patterns that Matter,
	 * Ph.D. dissertation, Universiteit Utrecht, 2010, p15.
	 * 
	 * @return the total compressed size of the encoded database and the code
	 *         table
	 */
	public double getCompressedSize() {
		return compressedSize;
	}

	/**
	 * Returns the coding set of this code table.
	 * 
	 * @return the coding set of this code table
	 */
	public SortedSet<FrequentItemSet> getItemSets() {
		SortedSet<FrequentItemSet> returnSet = new TreeSet<FrequentItemSet>(
				new StandardCoverComparator());
		returnSet.addAll(usage.keySet());
		return returnSet;
	}

	/**
	 * Returns the size of </code>itemSet
	 * <code> when its encoded by the standard code table.
	 * Corresponds to the size of the entry in the left side of the code table.
	 * 
	 * @param itemSet
	 *            an itemset of the code table
	 * @return the code length of the encoded itemset with the standard code
	 *         table
	 */
	public double getCodeLengthLeft(FrequentItemSet itemSet) {
		double size = 0;
		for (Item item : itemSet.getItems()) {
			size += Math.log((double) suppSum / (double) item.getFrequency());
		}
		return size / Math.log(2);
	}

	/**
	 * Returns the optimal code length of a specific itemset from this code
	 * table or -1 if the itemset is not contained in this code table.
	 * 
	 * @param itemSet
	 *            the itemSet to be encoded by this code table
	 * @return the relative length of the optimal code for this itemset
	 */
	public double getCodeLengthRight(FrequentItemSet itemSet) {
		if (usage.get(itemSet) != null) {
			if (usage.get(itemSet) == 0) {
				return 0;
			}
			return Math.log((double) usageSum / (double) usage.get(itemSet))
					/ Math.log(2);
		}
		return -1;
	}

	/**
	 * Returns the number of entries for this code table.
	 * 
	 * @return the number of stored itemsets for this code table
	 */
	public int getNumberOfEntrys() {
		return usage.keySet().size();
	}

	/**
	 * Returns the usage of an itemset, i.e. the number of transactions where
	 * the specified itemset is used for encoding.
	 * 
	 * @param itemSet
	 *            an itemset that is supposed to be contained inside the code
	 *            table
	 * @return the usage of the itemset
	 */
	public int getUsage(FrequentItemSet itemSet) {
		if (usage.get(itemSet) != null)
			return usage.get(itemSet);
		LogService
				.getRoot()
				.log(Level.WARNING,
						"The usage of itemsets that are not contained inside the code table is not defined, return -1.");
		return -1;
	}

	@Override
	public Iterator<FrequentItemSet> iterator() {
		return usage.keySet().iterator();
	}

	@Override
	public String toString() {
		StringBuffer output = new StringBuffer("Code Table ("
				+ usage.keySet().size() + "):" + Tools.getLineSeparator());
		for (FrequentItemSet set : usage.keySet()) {
			output.append(set.getItemsAsString());
			output.append(" / ");
			output.append(Tools.formatNumber(set.getFrequency()));
			output.append(" / ");
			output.append(Tools.formatNumber(usage.get(set)));
			output.append(Tools.getLineSeparator());
		}
		return output.toString();

	}

	@Override
	public String toResultString() {
		return toString();
	}

	@SuppressWarnings("unchecked")
	@Override
	public Object clone() {
		return new CodeTable(
				(SortedMap<FrequentItemSet, Integer>) ((TreeMap<FrequentItemSet, Integer>) usage)
						.clone(), usageSum, suppSum, compressedSize);
	}

	/**
	 * Removes unused item sets (sets with usage 0). Should only be called at
	 * the end of the code table computation.
	 */
	public void cleanUp() {
		for (FrequentItemSet itemSet : new HashSet<FrequentItemSet>(
				usage.keySet())) {
			if (usage.get(itemSet) == 0) {
				usage.remove(itemSet);
			}
		}
	}

}